creating ranks

the ranks will be a named num, with entrezgene_id as name and stat (Wald Test) as metric

getRanks <- function(res, annot) {
  # only taking genes which have entrezgene_ids assigned to them
  genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>% 
    filter(!is.na(entrezgene_id))
  
  ranks <- as.data.frame(res) %>%
    tibble::rownames_to_column("GeneID") %>%
    merge(genes_with_entrez, by = "GeneID") %>%
    arrange(desc(stat)) %>% 
    select(entrezgene_id, stat) %>% 
    tibble::deframe() # creating a named num from two columns
  return(ranks)
}

ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)

barplots of ranks

loading pathways

Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp

For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:

database gene sets
BioCarta 252
Reactome 1249
WikiPathways 186

applying fgsea

Here, using maxSize = 200.

fgseaRes <- fgsea(
  pathways = CGP,
  stats    = ranks,
  minSize  = 15,
  maxSize  = 200 # (see above the chunk if other value was used)
)

Enrichment score plot

ordering pathways by padj values and using ES to

# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=0.01, n=10) {
  .updown <- ifelse(up, `>`, `<`)
  
  top.pathways <- fgseaRes %>%
    filter(.updown(ES,0), padj < pCutoff) %>%
    arrange(padj) %>% 
    slice_head(n=n)
  
 return(top.pathways) 
}

plot for top up and down regulated pathways

# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
  # extracting the top n pathways
  top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
  
  plot.list <- list()
  # lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
  
  for (i in 1:nrow(top.pathways)) {
    # filling plot.list with enrichmentPlots 
    # TODO: how can I use facet_wrap for this?
    pathway <- top.pathways[i]$pathway
    plt <- plotEnrichment(pathways[[pathway]], ranks) +
      # TODO: adjust yaxis to the same scale
      # TODO: keep axis.text.x only on the lower row
      # TODO: keep axis.text.y only on the right column
      theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
      ) # +
      # coord_cartesian(xlim = lims$x, ylim = lims$y)
    plot.list[[i]] <- plt
  }
    
  arrange_plts(plot.list)
}

# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
  nplts <- length(plt.list)
  plt <- plt.list[1]
  xlab <- plt$labels$x
  ylab <- plt$labels$y
  
  # set axis to the same scale
  lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
  
  # remove axis
  
  # arrange the plots
  fig_labels <- LETTERS[1:nplts]
  
  figure <- ggpubr::ggarrange(plotlist = plt.list,
                              labels = fig_labels) %>%
    annotate_figure(left = text_grob(ylab, rot = 90),
                    bottom = text_grob(xlab))
  
  return(figure)
}



# plot_labels <-
#     data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)

gastroc up

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)

# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
B WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY
C WP_APOPTOSIS
D WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY
E REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL
F BIOCARTA_TNFR2_PATHWAY
G WP_CHEMOKINE_SIGNALING_PATHWAY
H REACTOME_ANTIMICROBIAL_PEPTIDES
I REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS

gastroc down

plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
B REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS
C REACTOME_RESPIRATORY_ELECTRON_TRANSPORT
D WP_ELECTRON_TRANSPORT_CHAIN
E REACTOME_COMPLEX_I_BIOGENESIS
F REACTOME_MITOCHONDRIAL_TRANSLATION
G REACTOME_KEAP1_NFE2L2_PATHWAY
H REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA
I REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS

soleus up

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
B REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS
C WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS
D REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC
E WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA
F REACTOME_EUKARYOTIC_TRANSLATION_INITIATION
G REACTOME_NONSENSE_MEDIATED_DECAY_NMD
H REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL
I REACTOME_INTERLEUKIN_7_SIGNALING

soleus down

plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)

plot_labels <-
    data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
plot labels
label pathway
A REACTOME_KEAP1_NFE2L2_PATHWAY
B REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS
C REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS
D REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME
E REACTOME_ACTIVATION_OF_APC_C_AND_APC_C_CDC20_MEDIATED_DEGRADATION_OF_MITOTIC_PROTEINS
F REACTOME_UBIQUITIN_MEDIATED_DEGRADATION_OF_PHOSPHORYLATED_CDC25A
G REACTOME_UCH_PROTEINASES
H REACTOME_REGULATION_OF_MRNA_STABILITY_BY_PROTEINS_THAT_BIND_AU_RICH_ELEMENTS
I REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS

GSEA table plot

gastroc

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.gastroc,
  fgseaRes = fgseaRes.gastroc,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

soleus

top significant pathways:

# creating up and down regulated pathway vectors separately to maintain order

topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
  arrange(-NES) %>%
  pull(pathway)

plotGseaTable(
  pathways = CGP[topPathways],
  stats = ranks.soleus,
  fgseaRes = fgseaRes.soleus,
  gseaParam = 0.5,
  render = TRUE
) %>%
  ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline

most differential regulated pathways, both tissues

using NES from the fgsea result filtering on the set pCutoff=0.01 yields the following plot:

prepare_result <- function(fgseaRes, colname="NES", pCutoff) {
  fgseaRes.prep <- fgseaRes %>%
    data.frame() %>%
    filter(padj < pCutoff) %>%
    dplyr::rename(!!colname := NES) %>%
    dplyr::select(!!colname, pathway)
  return(fgseaRes.prep)
}

prep.gastroc <- prepare_result(fgseaRes.gastroc, colname="gastroc", params$pCutoff)
prep.soleus  <- prepare_result(fgseaRes.soleus,  colname="soleus", params$pCutoff)


# combining wald-Test data from both tissues and ordering in quadrants
res.combined <- merge(prep.gastroc,
                      prep.soleus,
                      by = "pathway") %>%
  mutate(diff.exp = case_when(
    gastroc < 0 & soleus < 0 ~ "both down",
    gastroc > 0 & soleus > 0 ~ "both up",
    gastroc < 0 & soleus > 0 ~ "ga down, sol up",
    gastroc > 0 & soleus < 0 ~ "ga up, sol down",
    TRUE ~ "different"
  ))

# only annotating top_n pathways by: 
# removing all pathway_names except the top_n_pathways (sum of absolute NES numbers)
# top_n_pathways <- 10
# top_labels <- res.combined %>%
#   group_by(diff.exp) %>% 
#   arrange(desc(abs(gastroc) + abs(soleus))) %>%
#   filter(row_number() %in% c(1:top_n_pathways)) %>% 
#   ungroup() %>% 
#   .$pathway

# res.combined <- res.combined %>% 
#   mutate(pathway = ifelse(pathway %in% top_labels, pathway, ""))

# final plot
p <- ggplot(res.combined, aes(x = gastroc, y = soleus, text=pathway)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastroc", y = "soleus") +
  # ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "NES")

plotly::ggplotly(p, tooltip = "all")

barplot

ggplot(res.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp))